# This file is part of Sonedyan.
#
# Sonedyan is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation;
# either version 3 of the License, or (at your option) any
# later version.
#
# Sonedyan is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public.
# If not, see <http://www.gnu.org/licenses/>.
#
# Copyright (C) 2009-2012 Jimmy Dubuisson <jimmy.dubuisson@gmail.com>

#
# get a sample of the set of 1grams time series
#
# usage: python 6_sample_normalized-filtered-1grams.py <sample-size>
#

import sys, random

fd = open("normalized-filtered-1grams.txt", "r")

line = fd.readline().strip()
record = {}
counter = 0

while line:
        record[counter] = line
        line = fd.readline().strip()
	counter += 1

fd.close()

sample = random.sample(range(0, counter), int(sys.argv[1]))

fd2 = open("sample-normalized-filtered-1grams.txt", "w")

for i in sorted(sample):
	fd2.write(record[i] + "\n")

fd2.close()
